deltas: Heuristically detect endianness for older deltas
authorColin Walters <walters@verbum.org>
Wed, 24 Feb 2016 22:04:04 +0000 (17:04 -0500)
committerColin Walters <walters@verbum.org>
Fri, 26 Feb 2016 13:19:01 +0000 (08:19 -0500)
If the average object size is greater than 4GiB, let's assume we're
dealing with opposite endianness.  I'm fairly confident no one is
going to be shipping peta- or exa- byte size ostree deltas, period.
Past the gigabyte scale you really want bittorrent or something.

Makefile-tests.am
src/libostree/ostree-repo-static-delta-core.c
src/libostree/ostree-repo-static-delta-private.h
tests/pre-endian-deltas-repo-big.tar.xz [new file with mode: 0644]
tests/pre-endian-deltas-repo-little.tar.xz [new file with mode: 0644]
tests/test-delta.sh

index 475519d787b58ffef6af545e3543c912d61ae7db..6ec583571104b26229c29f0449ad376b24417b29 100644 (file)
@@ -85,6 +85,8 @@ insttest_DATA = tests/archive-test.sh \
        tests/test-basic-user.sh \
        tests/test-local-pull.sh \
        tests/corrupt-repo-ref.js \
+       tests/pre-endian-deltas-repo-big.tar.xz \
+       tests/pre-endian-deltas-repo-little.tar.xz \
        $(NULL)
 
 insttest_SCRIPTS += \
index 97cdb8c5bc6d1352e7ce3e0ba63ec546ed47b49e..0669f691e42593a788ab17bb34cd95fe218d4b9e 100644 (file)
@@ -670,15 +670,22 @@ show_one_part (OstreeRepo                    *self,
 }
 
 OstreeDeltaEndianness
-_ostree_delta_get_endianness (GVariant *superblock)
+_ostree_delta_get_endianness (GVariant *superblock,
+                              gboolean *out_was_heuristic)
 {
   guint8 endianness_char;
   g_autoptr(GVariant) delta_meta = NULL;
   g_autoptr(GVariantDict) delta_metadict = NULL;
+  guint64 total_size = 0;
+  guint64 total_usize = 0;
+  guint total_objects = 0;
 
   delta_meta = g_variant_get_child_value (superblock, 0);
   delta_metadict = g_variant_dict_new (delta_meta);
 
+  if (out_was_heuristic)
+    *out_was_heuristic = FALSE;
+
   if (g_variant_dict_lookup (delta_metadict, "ostree.endianness", "y", &endianness_char))
     {
       switch (endianness_char)
@@ -691,13 +698,58 @@ _ostree_delta_get_endianness (GVariant *superblock)
           return OSTREE_DELTA_ENDIAN_INVALID;
         }
     }
-  return OSTREE_DELTA_ENDIAN_UNKNOWN;
+
+  if (out_was_heuristic)
+    *out_was_heuristic = TRUE;
+
+  { g_autoptr(GVariant) meta_entries = NULL;
+    guint n_parts;
+    guint i;
+
+    g_variant_get_child (superblock, 6, "@a" OSTREE_STATIC_DELTA_META_ENTRY_FORMAT, &meta_entries);
+    n_parts = g_variant_n_children (meta_entries);
+
+    for (i = 0; i < n_parts; i++)
+      {
+        g_autoptr(GVariant) objects = NULL;
+        guint64 size, usize;
+        guint n_objects;
+
+        g_variant_get_child (meta_entries, i, "(u@aytt@ay)", NULL, NULL, &size, &usize, &objects);
+        n_objects = (guint)(g_variant_get_size (objects) / OSTREE_STATIC_DELTA_OBJTYPE_CSUM_LEN);
+
+        total_objects += n_objects;
+        total_size += size;
+        total_usize += usize;
+      }
+
+    /* If the average object size is greater than 4GiB, let's assume
+     * we're dealing with opposite endianness.  I'm fairly confident
+     * no one is going to be shipping peta- or exa- byte size ostree
+     * deltas, period.  Past the gigabyte scale you really want
+     * bittorrent or something.
+     */
+    if ((total_size / total_objects) > G_MAXUINT32)
+      {
+        switch (G_BYTE_ORDER)
+          {
+          case G_BIG_ENDIAN:
+            return OSTREE_DELTA_ENDIAN_LITTLE;
+          case G_LITTLE_ENDIAN:
+            return OSTREE_DELTA_ENDIAN_BIG;
+          default:
+            g_assert_not_reached ();
+          }
+      }
+
+    return G_BYTE_ORDER;
+  }
 }
 
 gboolean
 _ostree_delta_needs_byteswap (GVariant *superblock)
 {
-  switch (_ostree_delta_get_endianness (superblock))
+  switch (_ostree_delta_get_endianness (superblock, NULL))
     {
     case OSTREE_DELTA_ENDIAN_BIG:
       return G_BYTE_ORDER == G_LITTLE_ENDIAN;
@@ -738,24 +790,28 @@ _ostree_repo_static_delta_dump (OstreeRepo                    *self,
 
   g_print ("Delta: %s\n", delta_id);
   { const char *endianness_description;
+    gboolean was_heuristic;
 
-    endianness = _ostree_delta_get_endianness (delta_superblock);
+    endianness = _ostree_delta_get_endianness (delta_superblock, &was_heuristic);
 
     switch (endianness)
       {
       case OSTREE_DELTA_ENDIAN_BIG:
-        endianness_description = "big";
+        if (was_heuristic)
+          endianness_description = "big (heuristic)";
+        else
+          endianness_description = "big";
         if (G_BYTE_ORDER == G_LITTLE_ENDIAN)
           swap_endian = TRUE;
         break;
       case OSTREE_DELTA_ENDIAN_LITTLE:
-        endianness_description = "little";
+        if (was_heuristic)
+          endianness_description = "little (heuristic)";
+        else
+          endianness_description = "little";
         if (G_BYTE_ORDER == G_BIG_ENDIAN)
           swap_endian = TRUE;
         break;
-      case OSTREE_DELTA_ENDIAN_UNKNOWN:
-        endianness_description = "unknown";
-        break;
       case OSTREE_DELTA_ENDIAN_INVALID:
         endianness_description = "invalid";
         break;
index d9e5c456b69ca7d727821e48e15f706795063d35..41ddad48fda0578fd59e5d1834e7b731570dc7e7 100644 (file)
@@ -222,11 +222,10 @@ maybe_swap_endian_u64 (gboolean swap,
 typedef enum {
   OSTREE_DELTA_ENDIAN_BIG,
   OSTREE_DELTA_ENDIAN_LITTLE,
-  OSTREE_DELTA_ENDIAN_UNKNOWN,
   OSTREE_DELTA_ENDIAN_INVALID
 } OstreeDeltaEndianness;
 
-OstreeDeltaEndianness _ostree_delta_get_endianness (GVariant *superblock);
+OstreeDeltaEndianness _ostree_delta_get_endianness (GVariant *superblock, gboolean *out_was_heuristic);
 
 gboolean _ostree_delta_needs_byteswap (GVariant *superblock);
 
diff --git a/tests/pre-endian-deltas-repo-big.tar.xz b/tests/pre-endian-deltas-repo-big.tar.xz
new file mode 100644 (file)
index 0000000..05e51d6
Binary files /dev/null and b/tests/pre-endian-deltas-repo-big.tar.xz differ
diff --git a/tests/pre-endian-deltas-repo-little.tar.xz b/tests/pre-endian-deltas-repo-little.tar.xz
new file mode 100644 (file)
index 0000000..ef05045
Binary files /dev/null and b/tests/pre-endian-deltas-repo-little.tar.xz differ
index 4679ab8e0b9002582b1d96aae0d8c9ea3aa55bcc..ebe35571de56bc81de3ce5b12e6154c699d7c776 100755 (executable)
@@ -128,6 +128,27 @@ assert_streq "${totalsize_orig}" "${totalsize_swapped}"
 
 echo 'ok generate + show endian swapped'
 
+tar xf ${SRCDIR}/pre-endian-deltas-repo-big.tar.xz
+mv pre-endian-deltas-repo{,-big}
+tar xf ${SRCDIR}/pre-endian-deltas-repo-little.tar.xz
+mv pre-endian-deltas-repo{,-little}
+legacy_origrev=$(${CMD_PREFIX} ostree --repo=pre-endian-deltas-repo-big rev-parse main^)
+legacy_newrev=$(${CMD_PREFIX} ostree --repo=pre-endian-deltas-repo-big rev-parse main)
+${CMD_PREFIX} ostree --repo=pre-endian-deltas-repo-big static-delta show ${legacy_origrev}-${legacy_newrev} > show-legacy-big.txt
+totalsize_legacy_big=$(grep 'Total Size:' show-legacy-big.txt)
+${CMD_PREFIX} ostree --repo=pre-endian-deltas-repo-big static-delta show ${legacy_origrev}-${legacy_newrev} > show-legacy-little.txt
+totalsize_legacy_little=$(grep 'Total Size:' show-legacy-little.txt)
+for f in show-legacy-{big,little}.txt; do
+    if grep 'Endianness:.*heuristic' $f; then
+       found_heuristic=yes
+       break
+    fi
+done
+assert_streq "${found_heuristic}" "yes"
+assert_streq "${totalsize_legacy_big}" "${totalsize_legacy_little}"
+
+echo 'ok heuristic endian detection'
+
 mkdir repo2 && ${CMD_PREFIX} ostree --repo=repo2 init --mode=archive-z2
 ${CMD_PREFIX} ostree --repo=repo2 pull-local repo ${newrev}
 ${CMD_PREFIX} ostree --repo=repo2 fsck